files_dd <- list.files(path = "./raw_data/3_Derivable_Datasets/", pattern = ".xls", all.files = FALSE, full.names = FALSE)
fdd <- do.call("<-", list(files_dd, sapply(files_dd,
function(x) read.xlsx(paste0("./raw_data/3_Derivable_Datasets/", x ), na.strings = NA))))
names(fdd) <- str_split_i(names(fdd), "\\.", 1)
## У файлов из второй папки общая "болезнь" -- пояснительная строка под названием столбцов
## Я не нашел тривиального решения в коде, поэтому просто убрал эту строку в экселях
files_fet <- list.files(path = "./raw_data/Final_export_tables/", pattern = ".xls", all.files = FALSE, full.names = FALSE)
fnet <- do.call("<-", list(files_fet, sapply(files_fet,
function(x) read.xlsx(paste0("./raw_data/Final_export_tables/", x), na.strings = NA, startRow = 1))))
names(fnet) <- str_split_i(names(fnet), "\\.", 1)
Начнем плясать от печки собирать датасет от файла с
демографическими данными. Затем добавим все сырые файлы, в которых
единицей наблюдения является пациент (это датафреймы с данными о
показаниях к пересадке, профилактикой, лечением и выживаемостью)
# Если вы не убирали вторую строку в экселях, раскомментируйте концы строчек кода ниже (и уберите лишнюю запятую)
common_df <- left_join(left_join(left_join(left_join(fnet$DM_20230119_120304, # %>% slice(-1),
fnet$TU_20230119_120304 %>% select(!SITE), # %>% slice(-1),
by = "SUBJID"),
fnet$PREV_20230119_120304 %>% select(!SITE), # %>% slice(-1),
by = "SUBJID"),
fnet$TR_20230119_120304 %>% select(!SITE), # %>% slice(-1),
by = "SUBJID"),
fnet$STAT_20230119_120304 %>%
select(!SITE) %>%
rename(ALIVE = PTSTAT) , # %>% slice(-1),
by = "SUBJID")
## Добавим датасет по факту наличия той или иной формы РТПХ
## Предварительно упростим их и переведем в широкий формат
agvhd <- fnet$AGVHD_20230119_120304 %>%
pivot_wider(names_from = AGVHDLOC,
values_from = AGVHDST,
names_prefix = "acute_") %>% # Удобно отличать повреждаемый орган + в обоих датасетах есть переменная Liver
group_by(SUBJID) %>%
mutate(across(everything(), function(x) max(x, na.rm=TRUE))) %>%
ungroup() %>%
distinct(SUBJID, .keep_all = TRUE) %>%
select(!acute_NA)
## Warning: There were 240 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(everything(), function(x) max(x, na.rm = TRUE))`.
## ℹ In group 1: `SUBJID = "01001"`.
## Caused by warning in `max()`:
## ! no non-missing arguments, returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 239 remaining warnings.
cgvhd <- fnet$CGVHD_20230119_120304 %>%
pivot_wider(names_from = CGVHDLOC,
values_from = DAMDEG,
names_prefix = "chronic_") %>% # Удобно отличать повреждаемый орган + в обоих датасетах есть переменная Liver
group_by(SUBJID) %>%
mutate(across(everything(), function(x) max(x, na.rm=TRUE))) %>%
ungroup() %>%
distinct(SUBJID, .keep_all = TRUE) %>%
select(-c(chronic_NA, TYPEOTH))
## Warning: There were 394 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(everything(), function(x) max(x, na.rm = TRUE))`.
## ℹ In group 1: `SUBJID = "01003"`.
## Caused by warning in `max()`:
## ! no non-missing arguments to max; returning -Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 393 remaining warnings.
fgvhd <- fnet$GVHD_20230119_120304 %>%
pivot_wider(names_from = GVHDCAT, values_from = GVHDYN) %>%
select(!GVHDOTHM) %>% # Там одни NA
filter(!is.na(GVHDDTC) | `Cross syndrome` == "yes")
Преобразуем типы переменных
common_df_patient <- common_df %>%
mutate(across(c(SITE,SEX,ICD,TUTERM,LLTC,LLTN,PTC,
PTN,PSOCC,PSOCN,TRTYPE,TRSOURCE,
CONDTYPE,ALIVE,RELAPYN), ~ as.factor(.x))) %>%
mutate(across(c(BIRTHDTC,PRSTDTC,PRENDTC,TRDTC,
LCDTC,DEATHDTC,RELAPDTC), ~format(as.Date(.x, format = "%d/%m/%Y"), "%d.%m.%Y"))) %>%
mutate(across(c(TIMGDOSE, HATGDOSE, TRNUM), ~ as.numeric(.x))) %>%
select(-c(LLTC, PTC, PSOCC, ICD, TUTERM, LLTN)) # Удалим ряд классификаций нозологии (оставим только PTCN)
Меняем ошибочную дату
common_df_patient <- common_df_patient %>%
mutate(PRSTDTC = if_else(SUBJID == "01095", "08.07.2021", PRSTDTC))
Для текущего датафрейма единица наблюдения – 1 пациент
Добавим информацию про острую, хроническую и оверлап РТПХ
## Объединим с уже имеющимися данными
common_df_disease <- left_join(common_df_patient,
fgvhd %>% select(!SITE),
by="SUBJID")
## Разобьем на 3 датасета: острую, хроническую РТПХ и оверлап-синдром
common_df_disease_acute <- left_join(common_df_disease %>%
filter(`Acute GVHD` == "yes"), agvhd %>% select(!SITE), by = "SUBJID")
common_df_disease_chronic <- left_join(common_df_disease %>%
filter(`Chronic GVHD` == "yes"), cgvhd %>% select(!SITE), by = "SUBJID")
common_df_disease_cross <- common_df_disease %>%
filter(`Cross syndrome` == "yes")
common_df_disease <- full_join(full_join(common_df_disease_acute,
common_df_disease_chronic,
by = common_df_disease %>% colnames()),
common_df_disease_cross,
by = common_df_disease %>% colnames())
Преобразуем типы переменных.
## Добавим префикс acute и chronic к пораженным органам
common_df_disease <- common_df_disease %>%
mutate(
TRIND = case_when( # Эта переменная нужна для последующего объединения с терапией
`Acute GVHD` == "yes" ~ "acute GVHD",
`Chronic GVHD` == "yes" ~ "chronic GVHD",
`Cross syndrome` == "yes" ~ "cross syndrome"
) %>% as.factor()) %>%
mutate(
across(c(AGVHDOCC,AGVHDGR,acute_Skin,acute_Liver,
`acute_Upper gastrointestinal tract`,
`acute_Lower gastrointestinal tract`,
CGVHDOCC,PTSTAT,SEVTYPE,SEVGRADE,`chronic_Skin involvement, % lesion of body surface area`,
`chronic_Sclerotic changes of the skin`,
`chronic_Changes in oral cavity`,
`chronic_Eyes`,
`chronic_Gastrointestinal tract`, chronic_Liver, chronic_Lungs,
`chronic_Lungs functional assessment`,
`chronic_Joints and fascia`,
`chronic_Sex organs`,GVHDMETH,
`Acute GVHD`,`Chronic GVHD`,`Cross syndrome`), ~ as.factor(.x)),
GVHDAGE = GVHDAGE %>% as.numeric(),
GVHDDTC = format(as.Date(GVHDDTC, format = "%d/%m/%Y"), "%d.%m.%Y"),
COND = case_when( ## Переместим другую терапию в общий столбец и удалим столбец CONDOTH
COND == "other" ~ CONDOTH,
COND != "other" ~ COND
) %>% as.factor(),
PRSCHEM = case_when( ## Переместим другую профилактику в общий столбец и удалим столбец SCHEMOTH
PRSCHEM == "other" ~ SCHEMOTH,
PRSCHEM != "other" ~ PRSCHEM
),
across(c(AGVHDOCC,CGVHDOCC), ~ case_when(
is.na(.x) ~ "no",
.x == "yes" ~ "yes"
))
) %>%
rename( ## Переименуем ряд переменных
Acute_GVHD = `Acute GVHD`,
Chronic_GVHD = `Chronic GVHD`,
Cross_syndrome = `Cross syndrome`,
acute_UGT = `acute_Upper gastrointestinal tract`,
acute_LGT = `acute_Lower gastrointestinal tract`,
chronic_Skin_perc = `chronic_Skin involvement, % lesion of body surface area`,
chronic_Skin_scl = `chronic_Sclerotic changes of the skin`,
chronic_oral = `chronic_Changes in oral cavity`,
chronic_GT =`chronic_Gastrointestinal tract`,
chronic_Lungs_func = `chronic_Lungs functional assessment`,
chronic_Joints = `chronic_Joints and fascia`,
chronic_Sex = `chronic_Sex organs`
) %>%
select(-c(CONDOTH, Acute_GVHD, Chronic_GVHD,
Cross_syndrome, SCHEMOTH))
Для нашего текущего датафрейма единица наблюдения – наличие одного из типов РТПХ у одного пациента.
Вновь разобъем датасет на три субдатасета по заболеваниям
common_df_disease_acute <- common_df_disease %>%
filter(TRIND == "acute GVHD")
common_df_disease_chronic <- common_df_disease %>%
filter(TRIND == "chronic GVHD")
common_df_disease_cross <- common_df_disease %>%
filter(TRIND == "cross syndrome")
Добавим информацию о полученной терапии
common_df_treatment <- left_join(common_df_disease,
fnet$CM_20230119_120304 %>% select(!SITE),
by = c("SUBJID", "TRIND")) %>%
select(-c(REPDRUG, DRUGC, ATCC, INGR)) %>% ### Уберем колонки с дублирующейся и второстепенной информацией
mutate(
across(c(TRSTDTC, TRENDTC), ~ format(as.Date(.x, format = "%d/%m/%Y"), "%d.%m.%Y")),
across(c(GVHDTRYN, DRUGN, ATCN, LOT, TRONG, TRRESP, RESPEV), ~as.factor(.x)),
STERRES = if_else(TRRESP %in% c("no response",
"progression") & ATCN %in% c("GLUCOCORTICOIDS",
"CORTICOSTEROIDS, POTENT (GROUP III)",
"CORTICOSTEROIDS FOR SYSTEMIC USE"), "resistance", if_else((TRRESP != "no data" | TRRESP != "not estimated") & ATCN %in% c("GLUCOCORTICOIDS",
"CORTICOSTEROIDS, POTENT (GROUP III)",
"CORTICOSTEROIDS FOR SYSTEMIC USE"),
"no resistance", "no data")) %>% as.factor(), # Новая колонка с данными о резистентности к стероидам
across(c(GVHDTRYN,TRONG), ~ case_when(
is.na(.x) ~ "no",
.x == "yes" ~ "yes"
))
)
Добавим новые переменные: длительность профилактики, длительность лечения, интервал от пересадки КМ до установления диагноза РТПХ, интервал от пересадки КМ до начала профилактики, интервал от конца профилактики до начала лечения.
#View(common_df_treatment_chronic)
common_df_treatment <- common_df_treatment %>%
mutate(
TDINTER = (as.Date(GVHDDTC, ## От пересадки до РТПХ
format = "%d.%m.%Y") - as.Date(TRDTC,
format = "%d.%m.%Y")) %>% as.character %>% as.numeric,
TPINTER = (as.Date(PRSTDTC, ## От пересадки до профилактики
format = "%d.%m.%Y") - as.Date(TRDTC,
format = "%d.%m.%Y")) %>% as.character %>% as.numeric,
PTINTER = (as.Date(PRENDTC, ## От конца профилатики до начала лечения
format = "%d.%m.%Y") - as.Date(TRSTDTC,
format = "%d.%m.%Y")) %>% as.character %>% as.numeric,
PRINTER = (as.Date(PRENDTC, ## Профилактика
format = "%d.%m.%Y") - as.Date(PRSTDTC,
format = "%d.%m.%Y")) %>% as.character %>% as.numeric,
TRINTER = if_else(is.na(TRENDTC), (as.Date(LCDTC, ## Лечение
format = "%d.%m.%Y") - as.Date(TRSTDTC,
format = "%d.%m.%Y")) %>% as.character %>% as.numeric, (as.Date(TRENDTC,
format = "%d.%m.%Y") - as.Date(TRSTDTC,
format = "%d.%m.%Y")) %>% as.character %>% as.numeric)
)
# common_df_treatment_chronic <- common_df_treatment_chronic %>%
# filter(TPINTER>=3 & TPINTER < 5) # Удаляем людей, у которых профилактика началась ранее чем на 3-й день после трансплантации, и позднее, чем на 5-й.
### Пробная штука
test_drive <- common_df_treatment %>%
group_by(SUBJID, TRIND) %>%
mutate(LOT = max(is.numeric(LOT)),
GLUK = if_else(ATCN %in% c("GLUCOCORTICOIDS", "CORTICOSTEROIDS, DERMATOLOGICAL PREPARATIONS",
"CORTICOSTEROIDS FOR SYSTEMIC USE", "CORTICOSTEROIDS, POTENT (GROUP III)"), 1, 0),
JAK = if_else(ATCN %in% c("JANUS-ASSOCIATED KINASE (JAK) INHIBITORS"),1,0),
CALIN = if_else(ATCN %in% c("CALCINEURIN INHIBITORS"),1,0),
OTHER = if_else(ATCN %in% c("OTHER IMMUNOSUPPRESSANTS",
"SELECTIVE IMMUNOSUPPRESSANTS",
"INTERLEUKINS",
"BCR-ABL TYROSINE KINASE INHIBITORS",
"INTERLEUKIN INHIBITORS",
"OTHER IMMUNOSTIMULANTS",
"TUMOR NECROSIS FACTOR ALPHA (TNF-) INHIBITORS",
"IMIDAZOLE DERIVATIVES",
"BRUTON'S TYROSINE KINASE (BTK) INHIBITORS",
NA),1,0)) %>%
select(ATCN, GLUK, JAK, CALIN, OTHER)
## Adding missing grouping variables: `SUBJID`, `TRIND`
Разобьем полученные данные на три блока по типу заболевания (опять)
common_df_treatment_acute <- common_df_treatment %>%
filter(TRIND == "acute GVHD")
common_df_treatment_chronic <- common_df_treatment %>%
filter(TRIND == "chronic GVHD")
common_df_treatment_cross <- common_df_treatment %>%
filter(TRIND == "cross syndrome")
str(common_df_treatment)
## 'data.frame': 650 obs. of 61 variables:
## $ SITE : Factor w/ 4 levels "01","02","03",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SUBJID : chr "01001" "01002" "01003" "01005" ...
## $ BIRTHDTC : chr "11.01.1990" "05.05.1998" "30.09.1982" "28.09.1987" ...
## $ SEX : Factor w/ 2 levels "female","male": 1 2 1 1 2 1 2 2 2 1 ...
## $ PTN : Factor w/ 21 levels "ACUTE LYMPHOCYTIC LEUKAEMIA",..: 1 1 1 1 1 2 1 1 1 2 ...
## $ PSOCN : Factor w/ 2 levels "BLOOD AND LYMPHATIC SYSTEM DISORDERS",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ PRSTDTC : chr "07.09.2020" "21.04.2020" "08.05.2020" "20.02.2020" ...
## $ PRSCHEM : chr "PT-Cy(+3,+5)+CSA+MMF45" "TCRaB-CD19" "hATG+CSA+MTX+MMF30" "PT-Cy (+3,+4)+CSA+MMF30" ...
## $ TIMGDOSE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ HATGDOSE : num 0 0 40 0 0 0 0 0 0 0 ...
## $ PRENDTC : chr "01.06.2021" "20.05.2020" "17.05.2021" "25.10.2020" ...
## $ TRNUM : num 1 1 2 1 1 1 1 1 1 1 ...
## $ TRTYPE : Factor w/ 4 levels "Haplo","MMUD",..: 2 1 3 1 1 1 4 4 4 1 ...
## $ TRDTC : chr "04.09.2020" "22.04.2020" "12.05.2020" "17.02.2020" ...
## $ TRSOURCE : Factor w/ 2 levels "BM","BSC": 2 2 2 2 2 2 2 2 2 2 ...
## $ COND : Factor w/ 55 levels "Bu12+Cy120","CY50",..: 42 30 45 28 42 29 20 20 20 19 ...
## $ CONDTYPE : Factor w/ 2 levels "MAC","RIC": 2 1 2 2 2 2 2 2 2 2 ...
## $ ALIVE : Factor w/ 3 levels "alive","died",..: 1 2 2 1 2 1 2 2 2 1 ...
## $ LCDTC : chr "28.09.2022" NA NA "25.10.2022" ...
## $ DEATHDTC : chr NA "06.04.2021" "17.05.2021" NA ...
## $ RELAPYN : Factor w/ 3 levels "no","unknown",..: 1 3 1 1 1 1 1 1 1 1 ...
## $ RELAPDTC : chr NA "25.11.2020" NA NA ...
## $ GVHDDTC : chr "26.10.2020" "17.07.2020" "26.06.2020" "06.03.2020" ...
## $ GVHDAGE : num 30 22 37 32 20 45 35 35 35 42 ...
## $ GVHDMETH : Factor w/ 5 levels "Glucksberg","IBMTR",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ AGVHDOCC : chr "yes" "yes" "yes" "yes" ...
## $ AGVHDGR : Factor w/ 4 levels "1","2","3","4": 1 1 3 1 3 1 3 3 3 1 ...
## $ acute_Skin : Factor w/ 5 levels "0","1","2","3",..: 3 2 4 3 2 2 3 3 3 2 ...
## $ acute_Liver : Factor w/ 5 levels "0","1","2","3",..: 1 1 3 1 4 1 3 3 3 1 ...
## $ acute_UGT : Factor w/ 5 levels "0","1","2","3",..: 1 1 1 1 1 1 4 4 4 1 ...
## $ acute_LGT : Factor w/ 5 levels "0","1","2","3",..: 1 1 2 1 1 1 4 4 4 1 ...
## $ CGVHDOCC : chr "no" "no" "no" "no" ...
## $ PTSTAT : Factor w/ 4 levels "0","1","2","3": NA NA NA NA NA NA NA NA NA NA ...
## $ SEVTYPE : Factor w/ 3 levels "NIH2005","NIH2014",..: NA NA NA NA NA NA NA NA NA NA ...
## $ SEVGRADE : Factor w/ 3 levels "mild","moderate",..: NA NA NA NA NA NA NA NA NA NA ...
## $ chronic_Skin_perc : Factor w/ 4 levels "0","1","2","3": NA NA NA NA NA NA NA NA NA NA ...
## $ chronic_Skin_scl : Factor w/ 4 levels "0","1","2","3": NA NA NA NA NA NA NA NA NA NA ...
## $ chronic_oral : Factor w/ 4 levels "0","1","2","3": NA NA NA NA NA NA NA NA NA NA ...
## $ chronic_Eyes : Factor w/ 4 levels "0","1","2","3": NA NA NA NA NA NA NA NA NA NA ...
## $ chronic_GT : Factor w/ 4 levels "0","1","2","3": NA NA NA NA NA NA NA NA NA NA ...
## $ chronic_Liver : Factor w/ 4 levels "0","1","2","3": NA NA NA NA NA NA NA NA NA NA ...
## $ chronic_Lungs : Factor w/ 4 levels "0","1","2","3": NA NA NA NA NA NA NA NA NA NA ...
## $ chronic_Lungs_func: Factor w/ 4 levels "0","1","2","3": NA NA NA NA NA NA NA NA NA NA ...
## $ chronic_Joints : Factor w/ 3 levels "0","1","2": NA NA NA NA NA NA NA NA NA NA ...
## $ chronic_Sex : Factor w/ 4 levels "0","1","2","3": NA NA NA NA NA NA NA NA NA NA ...
## $ TRIND : chr "acute GVHD" "acute GVHD" "acute GVHD" "acute GVHD" ...
## $ GVHDTRYN : chr "yes" "no" "yes" "yes" ...
## $ DRUGN : Factor w/ 27 levels "ACTIVATED T-LYMPHOCYTES",..: 18 NA 14 14 14 NA 18 13 20 NA ...
## $ ATCN : Factor w/ 15 levels "BCR-ABL TYROSINE KINASE INHIBITORS",..: 7 NA 7 7 7 NA 7 13 11 NA ...
## $ LOT : Factor w/ 6 levels "1","2","3","4",..: 1 NA 1 1 1 NA 1 2 6 NA ...
## $ TRSTDTC : chr "11.11.2020" NA "26.06.2020" "06.03.2020" ...
## $ TRENDTC : chr "09.03.2021" NA "04.09.2020" "28.05.2020" ...
## $ TRONG : chr "no" "no" "no" "no" ...
## $ TRRESP : Factor w/ 6 levels "complete response",..: 1 NA 2 1 1 NA 6 5 1 NA ...
## $ RESPEV : Factor w/ 3 levels "MAGIC","NIH2014",..: 1 NA 1 2 1 NA 2 2 2 NA ...
## $ STERRES : Factor w/ 3 levels "no data","no resistance",..: 2 1 3 2 2 1 3 1 1 1 ...
## $ TDINTER : num 52 86 45 18 46 109 47 47 47 33 ...
## $ TPINTER : num 3 -1 -4 3 3 0 -5 -5 -5 2 ...
## $ PTINTER : num 202 NA 325 233 103 NA 50 47 21 NA ...
## $ PRINTER : num 267 29 374 248 146 29 104 104 104 NA ...
## $ TRINTER : num 118 NA 70 83 70 NA 50 14 21 NA ...
summary(common_df_treatment)
## SITE SUBJID BIRTHDTC SEX
## 01:200 Length:650 Length:650 female:304
## 02:383 Class :character Class :character male :346
## 03: 1 Mode :character Mode :character
## 04: 66
##
##
##
## PTN
## ACUTE MYELOID LEUKAEMIA :303
## ACUTE LYMPHOCYTIC LEUKAEMIA:145
## APLASTIC ANAEMIA : 40
## CHRONIC MYELOID LEUKAEMIA : 36
## MYELODYSPLASTIC SYNDROME : 28
## PRIMARY MYELOFIBROSIS : 18
## (Other) : 80
## PSOCN
## BLOOD AND LYMPHATIC SYSTEM DISORDERS : 40
## NEOPLASMS BENIGN, MALIGNANT AND UNSPECIFIED (INCL CYSTS AND POLYPS):610
##
##
##
##
##
## PRSTDTC PRSCHEM TIMGDOSE HATGDOSE
## Length:650 Length:650 Min. : 0.0000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.0000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.0000 Median : 0.000
## Mean : 0.3023 Mean : 4.108
## 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :40.0000 Max. :120.000
##
## PRENDTC TRNUM TRTYPE TRDTC TRSOURCE
## Length:650 Min. :1.000 Haplo:315 Length:650 BM :108
## Class :character 1st Qu.:1.000 MMUD :110 Class :character BSC:542
## Mode :character Median :1.000 MRD :106 Mode :character
## Mean :1.182 MUD :119
## 3rd Qu.:1.000
## Max. :5.000
##
## COND CONDTYPE ALIVE LCDTC
## Flu180+Bu8 :159 MAC:130 alive :465 Length:650
## Flu180+Bu10: 71 RIC:520 died :177 Class :character
## Flu180+Bu12: 59 unknown: 8 Mode :character
## Flu150+Bu8 : 54
## Flu180+Bu14: 39
## Flu90+Benda: 28
## (Other) :240
## DEATHDTC RELAPYN RELAPDTC GVHDDTC
## Length:650 no :514 Length:650 Length:650
## Class :character unknown: 11 Class :character Class :character
## Mode :character yes :125 Mode :character Mode :character
##
##
##
##
## GVHDAGE GVHDMETH AGVHDOCC AGVHDGR acute_Skin
## Min. :12.00 Glucksberg :188 Length:650 1 :102 0 : 73
## 1st Qu.:24.00 IBMTR : 1 Class :character 2 : 84 1 : 73
## Median :36.00 MAGIC :146 Mode :character 3 : 92 2 : 75
## Mean :36.24 NIH2014 :279 4 : 60 3 :100
## 3rd Qu.:46.00 subjectively: 9 NA's:312 4 : 17
## Max. :76.00 NA's : 27 NA's:312
## NA's :33
## acute_Liver acute_UGT acute_LGT CGVHDOCC PTSTAT
## 0 :249 0 :312 0 :222 Length:650 0 : 41
## 1 : 14 1 : 17 1 : 20 Class :character 1 : 83
## 2 : 14 2 : 2 2 : 16 Mode :character 2 : 76
## 3 : 32 3 : 6 3 : 43 3 : 85
## 4 : 29 4 : 1 4 : 37 NA's:365
## NA's:312 NA's:312 NA's:312
##
## SEVTYPE SEVGRADE chronic_Skin_perc chronic_Skin_scl
## NIH2005 : 1 mild : 49 0 : 80 0 :272
## NIH2014 :283 moderate: 91 1 : 83 1 : 8
## subjectively: 1 severe :145 2 : 78 2 : 3
## NA's :365 NA's :365 3 : 44 3 : 2
## NA's:365 NA's:365
##
##
## chronic_oral chronic_Eyes chronic_GT chronic_Liver chronic_Lungs
## 0 :115 0 :154 0 :244 0 :168 0 :246
## 1 : 99 1 : 77 1 : 8 1 : 17 1 : 13
## 2 : 57 2 : 41 2 : 14 2 : 46 2 : 13
## 3 : 14 3 : 13 3 : 19 3 : 54 3 : 13
## NA's:365 NA's:365 NA's:365 NA's:365 NA's:365
##
##
## chronic_Lungs_func chronic_Joints chronic_Sex TRIND
## 0 :254 0 :273 0 :257 Length:650
## 1 : 12 1 : 8 1 : 16 Class :character
## 2 : 7 2 : 4 2 : 4 Mode :character
## 3 : 12 NA's:365 3 : 8
## NA's:365 NA's:365
##
##
## GVHDTRYN DRUGN
## Length:650 METHYLPREDNISOLONE:138
## Class :character RUXOLITINIB :105
## Mode :character PREDNISOLONE : 61
## TACROLIMUS : 43
## CICLOSPORIN : 29
## (Other) : 86
## NA's :188
## ATCN LOT
## GLUCOCORTICOIDS :201 1 :305
## JANUS-ASSOCIATED KINASE (JAK) INHIBITORS :105 2 :106
## CALCINEURIN INHIBITORS : 72 3 : 38
## SELECTIVE IMMUNOSUPPRESSANTS : 38 4 : 9
## TUMOR NECROSIS FACTOR ALPHA (TNF-) INHIBITORS: 22 5 : 3
## (Other) : 24 A : 2
## NA's :188 NA's:187
## TRSTDTC TRENDTC TRONG
## Length:650 Length:650 Length:650
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## TRRESP RESPEV STERRES TDINTER
## complete response:178 MAGIC : 34 no data :446 Min. : 3.0
## no response : 99 NIH2014 :407 no resistance:142 1st Qu.: 37.0
## not estimated : 14 subjectively: 6 resistance : 62 Median : 89.0
## other : 1 NA's :203 Mean :119.5
## partial response :153 3rd Qu.:169.0
## progression : 18 Max. :645.0
## NA's :187 NA's :33
## TPINTER PTINTER PRINTER TRINTER
## Min. : -8.000 Min. :-635.00 Min. : 1 Min. : -2.0
## 1st Qu.: -1.000 1st Qu.: -81.00 1st Qu.: 72 1st Qu.: 33.0
## Median : 3.000 Median : -17.00 Median :120 Median : 70.0
## Mean : 1.654 Mean : -19.85 Mean :138 Mean :136.0
## 3rd Qu.: 3.000 3rd Qu.: 50.00 3rd Qu.:192 3rd Qu.:187.8
## Max. :125.000 Max. : 509.00 Max. :600 Max. :810.0
## NA's :3 NA's :403 NA's :289 NA's :192
Подсчитаем описательные статистики
Нам интересно посмотреть на следующие переменные Факторные: SEX (пол), TUTERM (диагноз), PSOCN (группа заболеваний) TIMGDOSE? (есть-нет), HATGDOSE (есть-нет), TRTYPE (тип донора), TRSOURCE (источник КМ), ALIVE (выжил ли пациент), RELAPYN (был ли рецидив), PTSTAT (состояние пациента при обследовании), SEVGRADE? (степень тяжести), все, что начинается с chronic? (проявления заболевания), STERRES (развилась ли резистентность к стероидам) Числовые: TRNUM (число пересадок), GVHDAGE (возраст начала заболевания), все, что заканчивается на INTER (временные интервалы)
# Функция подсчитывает число единиц наблюдения для градации фактора,
# долю наблюдений с данной градацией от всех наблюдений,
# доверительный интервал (по Уилсону) для этой доли
catStat <- function(df, factorr){
df %>%
select(variant = {{ factorr }}) %>%
mutate(variant = as.character(variant) %>% replace_na("no_data") %>% as.factor()) %>%
count(variant) %>%
rename(number = n) %>%
mutate(proportionIntoGroup = round(number/sum(number),3),
proportionCI = paste(
round(binconf(number, sum(number), method = "wilson")[,2], 3),
round(binconf(number, sum(number), method = "wilson")[,3], 3),
sep="-"),
variable = factorr) %>%
relocate(variable, .after=1)
}
### Функция стандартной ошибки
se <- function(x){
sd(x, na.rm=TRUE)/sqrt(length(x))
}
### Вычисляем описательные статистики для нумерических переменных
statistics <- list(
Counts = ~ length(.x) %>% as.character(),
NAs = ~ sum(is.na(.x)) %>% as.character(),
Mean = ~ mean(.x, na.rm=TRUE) %>% round(3) %>% as.character(),
SD = ~ sd(.x, na.rm=TRUE) %>% round(3) %>% as.character(),
CI95 = ~ paste(round(mean(.x, na.rm=TRUE) - 1.96 * se(.x), 3),
round(mean(.x, na.rm=TRUE) + 1.96 * se(.x), 3), sep="-"),
Median = ~ median(.x, na.rm=TRUE) %>% round(3) %>% as.character(),
Quantiles = ~ paste(round(quantile(.x, probs=c(0.25), na.rm=TRUE), 3),
round(quantile(.x, probs=c(0.75), na.rm=TRUE), 3), sep="-"),
Iqr = ~ IQR(.x, na.rm=TRUE) %>% round(3) %>% as.character(),
Min = ~ min(.x, na.rm=TRUE) %>% round(3) %>% as.character(),
Max = ~ max(.x, na.rm=TRUE) %>% round(3) %>% as.character()
)
По-хорошему весь нижележащий код надо обернуть в функции
factor_patient_table <- lapply(common_df_patient %>%
select(where(is.factor)) %>%
colnames,
function(x) catStat(common_df_patient, x) %>%
as.data.frame) %>%
do.call(rbind, .) %>%
select(variable, variant, number, proportionIntoGroup, proportionCI) %>%
rename(group_proportion = proportionIntoGroup,
proportion_CI = proportionCI)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(x)
##
## # Now:
## data %>% select(all_of(x))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
numeric_patient_table <- common_df_patient %>%
summarise(across(is.numeric, statistics)) %>%
stack %>%
rename(value = values) %>%
separate(ind, sep = "_", into = c("variable", "statistic")) %>%
pivot_wider(
names_from = variable,
values_from = value
)
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(is.numeric, statistics)`.
## Caused by warning:
## ! Use of bare predicate functions was deprecated in tidyselect 1.1.0.
## ℹ Please use wrap predicates in `where()` instead.
## # Was:
## data %>% select(is.numeric)
##
## # Now:
## data %>% select(where(is.numeric))
factor_disease_acute_table <- lapply(common_df_disease %>% filter(TRIND == "acute GVHD") %>%
select(where(is.factor)) %>%
colnames,
function(x) catStat(common_df_disease %>% filter(TRIND == "acute GVHD"), x) %>%
as.data.frame) %>%
do.call(rbind, .) %>%
select(variable, variant, number, proportionIntoGroup, proportionCI) %>%
rename(group_proportion = proportionIntoGroup,
proportion_CI = proportionCI)
numeric_disease_acute_table <- common_df_disease %>% filter(TRIND == "acute GVHD") %>%
summarise(across(is.numeric, statistics)) %>%
stack %>%
rename(value = values) %>%
separate(ind, sep = "_", into = c("variable", "statistic")) %>%
pivot_wider(
names_from = variable,
values_from = value
)
factor_disease_chronic_table <- lapply(common_df_disease %>% filter(TRIND == "chronic GVHD") %>%
select(where(is.factor)) %>%
colnames,
function(x) catStat(common_df_disease %>% filter(TRIND == "chronic GVHD"), x) %>%
as.data.frame) %>%
do.call(rbind, .) %>%
select(variable, variant, number, proportionIntoGroup, proportionCI) %>%
rename(group_proportion = proportionIntoGroup,
proportion_CI = proportionCI)
numeric_disease_chronic_table <- common_df_disease %>% filter(TRIND == "chronic GVHD") %>%
summarise(across(is.numeric, statistics)) %>%
stack %>%
rename(value = values) %>%
separate(ind, sep = "_", into = c("variable", "statistic")) %>%
pivot_wider(
names_from = variable,
values_from = value
)
factor_disease_cross_table <- lapply(common_df_disease %>% filter(TRIND == "cross syndrome") %>%
select(where(is.factor)) %>%
colnames,
function(x) catStat(common_df_disease %>% filter(TRIND == "cross syndrome"), x) %>%
as.data.frame) %>%
do.call(rbind, .) %>%
select(variable, variant, number, proportionIntoGroup, proportionCI) %>%
rename(group_proportion = proportionIntoGroup,
proportion_CI = proportionCI)
numeric_disease_cross_table <- common_df_disease %>% filter(TRIND == "cross syndrome") %>%
summarise(across(is.numeric, statistics)) %>%
stack %>%
rename(value = values) %>%
separate(ind, sep = "_", into = c("variable", "statistic")) %>%
pivot_wider(
names_from = variable,
values_from = value
)
## Warning: There were 2 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `across(is.numeric, statistics)`.
## Caused by warning in `min()`:
## ! no non-missing arguments to min; returning Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
factor_treatment_acute_table <- lapply(common_df_treatment %>% filter(TRIND == "acute GVHD") %>%
select(where(is.factor)) %>%
colnames,
function(x) catStat(common_df_treatment %>% filter(TRIND == "acute GVHD"), x) %>%
as.data.frame) %>%
do.call(rbind, .) %>%
select(variable, variant, number, proportionIntoGroup, proportionCI) %>%
rename(group_proportion = proportionIntoGroup,
proportion_CI = proportionCI)
numeric_treatment_acute_table <- common_df_treatment %>% filter(TRIND == "acute GVHD") %>%
summarise(across(is.numeric, statistics)) %>%
stack %>%
rename(value = values) %>%
separate(ind, sep = "_", into = c("variable", "statistic")) %>%
pivot_wider(
names_from = variable,
values_from = value
)
factor_treatment_chronic_table <- lapply(common_df_treatment %>% filter(TRIND == "chronic GVHD") %>%
select(where(is.factor)) %>%
colnames,
function(x) catStat(common_df_treatment %>% filter(TRIND == "chronic GVHD"), x) %>%
as.data.frame) %>%
do.call(rbind, .) %>%
select(variable, variant, number, proportionIntoGroup, proportionCI) %>%
rename(group_proportion = proportionIntoGroup,
proportion_CI = proportionCI)
numeric_treatment_chronic_table <- common_df_treatment %>% filter(TRIND == "chronic GVHD") %>%
summarise(across(is.numeric, statistics)) %>%
stack %>%
rename(value = values) %>%
separate(ind, sep = "_", into = c("variable", "statistic")) %>%
pivot_wider(
names_from = variable,
values_from = value
)
factor_treatment_cross_table <- lapply(common_df_treatment %>% filter(TRIND == "cross syndrome") %>%
select(where(is.factor)) %>%
colnames,
function(x) catStat(common_df_treatment %>% filter(TRIND == "cross syndrome"), x) %>%
as.data.frame) %>%
do.call(rbind, .) %>%
select(variable, variant, number, proportionIntoGroup, proportionCI) %>%
rename(group_proportion = proportionIntoGroup,
proportion_CI = proportionCI)
numeric_treatment_cross_table <- common_df_treatment %>% filter(TRIND == "cross syndrome") %>%
summarise(across(is.numeric, statistics)) %>%
stack %>%
rename(value = values) %>%
separate(ind, sep = "_", into = c("variable", "statistic")) %>%
pivot_wider(
names_from = variable,
values_from = value
)
## Warning: There were 4 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `across(is.numeric, statistics)`.
## Caused by warning in `min()`:
## ! no non-missing arguments to min; returning Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 3 remaining warnings.
bar_custom <- function(df, variable, filt="green"){
ggplot(df)+
geom_bar(aes(x = pull(df[variable])), fill=filt, colour="black")+
labs(x = glue("Градации переменной {variable}"),
y = "Количество",
title = glue("Частоты единиц наблюдения по категориям\nпеременной {variable}"))+
theme_bw()+
theme(axis.text.x = element_text(size=14, angle = 25),
axis.text.y = element_text(size=14),
axis.title.x = element_text(size=18),
axis.title.y = element_text(size=18),
plot.title = element_text(size=22, hjust=0.5))
}
factor_count <- function(df, variable1, variable2){
ggplot(df)+
geom_count(aes(x = pull(df[variable1]), y = pull(df[variable2])))+
labs(x = glue("Градации переменной {variable1}"),
y = glue("Градации переменной {variable2}"),
title = glue("Количество единиц наблюдения по категориям\nпеременной {variable1} и {variable2}"))+
theme_bw()+
theme(axis.text.x = element_text(size=14, angle = 25),
axis.text.y = element_text(size=14),
axis.title.x = element_text(size=18),
axis.title.y = element_text(size=18),
plot.title = element_text(size=22, hjust=0.5))
}
# Подумать насчет аналога каунтов для процентов
# Общий график для дат
print("Барплоты по пациентам")
## [1] "Барплоты по пациентам"
lapply(common_df_patient %>%
select(SITE, SEX, PTN, PSOCN,
TRTYPE, TRSOURCE, CONDTYPE,
RELAPYN, ALIVE) %>%
colnames, function(x) bar_custom(common_df_patient, x))
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
print("Барплоты по заболеванию")
## [1] "Барплоты по заболеванию"
lapply(common_df_disease_chronic %>%
select(PTSTAT, SEVGRADE) %>%
colnames, function(x) bar_custom(common_df_disease_chronic, x, "red"))
## [[1]]
##
## [[2]]
print("Барплоты по терапии")
## [1] "Барплоты по терапии"
lapply(common_df_treatment_chronic %>%
select(LOT, STERRES, TRRESP, ATCN) %>%
colnames, function(x) bar_custom(common_df_treatment_chronic, x, "blue"))
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
print("Каунтплоты по выживаемости -- пациенты")
## [1] "Каунтплоты по выживаемости -- пациенты"
lapply(common_df_patient %>%
select(SITE, SEX, PTN, PSOCN,
TRTYPE, TRSOURCE, CONDTYPE,
RELAPYN) %>%
colnames, function(x) factor_count(common_df_patient, x, "ALIVE"))
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
print("Каунтплоты по выживаемости -- заболевание")
## [1] "Каунтплоты по выживаемости -- заболевание"
lapply(common_df_disease_chronic %>%
select(PTSTAT, SEVGRADE) %>%
colnames, function(x) factor_count(common_df_disease_chronic, x, "ALIVE"))
## [[1]]
##
## [[2]]
print("Каунтплоты по выживаемости -- терапия")
## [1] "Каунтплоты по выживаемости -- терапия"
lapply(common_df_treatment_chronic %>%
select(LOT, STERRES, TRRESP, ATCN) %>%
colnames, function(x) factor_count(common_df_treatment_chronic, x, "ALIVE"))
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
print("Каунтплоты по ОvsХ -- заболевание")
## [1] "Каунтплоты по ОvsХ -- заболевание"
# lapply(common_df_disease %>%
# filter(#уточнить дату окончания исследования#)
# select(SITE, SEX, PTN, PSOCN,
# TRTYPE, TRSOURCE, CONDTYPE,
# RELAPYN, PTSTAT, SEVGRADE))
print("Резистентность к стероидам -- терапия")
## [1] "Резистентность к стероидам -- терапия"
lapply(common_df_treatment_chronic %>%
select(SITE, SEX, PTN, PSOCN,
TRTYPE, TRSOURCE, CONDTYPE,
RELAPYN, ALIVE, PTSTAT, SEVGRADE,
LOT, TRRESP, ATCN) %>%
colnames, function(x) factor_count(common_df_treatment_chronic, x, "STERRES"))
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
# ggplot(common_df_disease_chronic)+
# geom_bar(aes(x = pull(common_df_disease_chronic["PTSTAT"])), fill="green", colour="black")+
# # labs(x = glue("Градации переменной {variable}"),
# # y = "Количество",
# # title = glue("Частоты единиц наблюдения по категориям\nпеременной {variable}"))+
# theme_bw()+
# theme(axis.text.x = element_text(size=14, angle = 25),
# axis.text.y = element_text(size=14),
# axis.title.x = element_text(size=18),
# axis.title.y = element_text(size=18),
# plot.title = element_text(size=22, hjust=0.5))
Боксплоты для продолжительности временных интервалов
#нбд Нормально оформить подписи и названия графиков
boxpCreator_inter <- function(df, y){
ggplot(df)+
geom_boxplot(aes(y=pull(df[y])),
fill = "green",
color="black")+
labs(y = "Количество дней")+
theme_bw()+
theme(axis.text.y = element_text(size=16),
axis.title.x = element_text(size=19),
axis.title.y = element_text(size=19),
plot.title = element_text(size=20, hjust=0.5),
legend.title = element_text(size=21),
legend.text = element_text(size=18))
}
lapply(common_df_treatment_chronic %>%
select(ends_with("INTER")) %>%
colnames, function(y) boxpCreator_inter(common_df_treatment_chronic, y))
## [[1]]
## Warning: Removed 6 rows containing non-finite values (`stat_boxplot()`).
##
## [[2]]
##
## [[3]]
## Warning: Removed 184 rows containing non-finite values (`stat_boxplot()`).
##
## [[4]]
## Warning: Removed 142 rows containing non-finite values (`stat_boxplot()`).
##
## [[5]]
## Warning: Removed 77 rows containing non-finite values (`stat_boxplot()`).
Другой вариант табличек
common_df_patient %>%
select(SITE, SEX, PTN, PSOCN,
TRTYPE, TRSOURCE, CONDTYPE,
RELAPYN, ALIVE) %>%
#filter(ALIVE != "unknown") %>%
tbl_summary(by=ALIVE) #%>%
| Characteristic | alive, N = 2521 | died, N = 861 | unknown, N = 51 |
|---|---|---|---|
| SITE | |||
| 01 | 77 (31%) | 35 (41%) | 5 (100%) |
| 02 | 150 (60%) | 40 (47%) | 0 (0%) |
| 03 | 1 (0.4%) | 0 (0%) | 0 (0%) |
| 04 | 24 (9.5%) | 11 (13%) | 0 (0%) |
| SEX | |||
| female | 137 (54%) | 38 (44%) | 2 (40%) |
| male | 115 (46%) | 48 (56%) | 3 (60%) |
| PTN | |||
| ACUTE LYMPHOCYTIC LEUKAEMIA | 54 (21%) | 26 (30%) | 5 (100%) |
| ACUTE MYELOID LEUKAEMIA | 114 (45%) | 38 (44%) | 0 (0%) |
| ACUTE MYELOMONOCYTIC LEUKAEMIA | 1 (0.4%) | 0 (0%) | 0 (0%) |
| ACUTE PROMYELOCYTIC LEUKAEMIA | 1 (0.4%) | 1 (1.2%) | 0 (0%) |
| ANAPLASTIC LARGE-CELL LYMPHOMA | 1 (0.4%) | 0 (0%) | 0 (0%) |
| ANGIOIMMUNOBLASTIC T-CELL LYMPHOMA | 1 (0.4%) | 0 (0%) | 0 (0%) |
| APLASTIC ANAEMIA | 18 (7.1%) | 6 (7.0%) | 0 (0%) |
| BURKITT'S LYMPHOMA | 0 (0%) | 1 (1.2%) | 0 (0%) |
| CHRONIC LYMPHOCYTIC LEUKAEMIA | 6 (2.4%) | 0 (0%) | 0 (0%) |
| CHRONIC MYELOID LEUKAEMIA | 14 (5.6%) | 3 (3.5%) | 0 (0%) |
| CHRONIC MYELOMONOCYTIC LEUKAEMIA | 2 (0.8%) | 0 (0%) | 0 (0%) |
| DIFFUSE LARGE B-CELL LYMPHOMA | 2 (0.8%) | 2 (2.3%) | 0 (0%) |
| HODGKIN'S DISEASE | 4 (1.6%) | 2 (2.3%) | 0 (0%) |
| MANTLE CELL LYMPHOMA | 2 (0.8%) | 3 (3.5%) | 0 (0%) |
| MYELODYSPLASTIC SYNDROME | 15 (6.0%) | 3 (3.5%) | 0 (0%) |
| PERIPHERAL T-CELL LYMPHOMA UNSPECIFIED | 3 (1.2%) | 0 (0%) | 0 (0%) |
| PLASMA CELL MYELOMA | 1 (0.4%) | 0 (0%) | 0 (0%) |
| PRIMARY MEDIASTINAL LARGE B-CELL LYMPHOMA | 1 (0.4%) | 0 (0%) | 0 (0%) |
| PRIMARY MYELOFIBROSIS | 9 (3.6%) | 0 (0%) | 0 (0%) |
| T-CELL LYMPHOMA | 1 (0.4%) | 0 (0%) | 0 (0%) |
| T-CELL TYPE ACUTE LEUKAEMIA | 2 (0.8%) | 1 (1.2%) | 0 (0%) |
| PSOCN | |||
| BLOOD AND LYMPHATIC SYSTEM DISORDERS | 18 (7.1%) | 6 (7.0%) | 0 (0%) |
| NEOPLASMS BENIGN, MALIGNANT AND UNSPECIFIED (INCL CYSTS AND POLYPS) | 234 (93%) | 80 (93%) | 5 (100%) |
| TRTYPE | |||
| Haplo | 97 (38%) | 55 (64%) | 4 (80%) |
| MMUD | 49 (19%) | 12 (14%) | 0 (0%) |
| MRD | 55 (22%) | 9 (10%) | 1 (20%) |
| MUD | 51 (20%) | 10 (12%) | 0 (0%) |
| TRSOURCE | |||
| BM | 36 (14%) | 11 (13%) | 0 (0%) |
| BSC | 216 (86%) | 75 (87%) | 5 (100%) |
| CONDTYPE | |||
| MAC | 30 (12%) | 17 (20%) | 1 (20%) |
| RIC | 222 (88%) | 69 (80%) | 4 (80%) |
| RELAPYN | |||
| no | 218 (87%) | 48 (56%) | 0 (0%) |
| unknown | 3 (1.2%) | 4 (4.7%) | 0 (0%) |
| yes | 31 (12%) | 34 (40%) | 5 (100%) |
| 1 n (%) | |||
#add_p()
common_df_disease_chronic %>%
select(SITE, SEX, PTN, PSOCN,
TRTYPE, TRSOURCE, CONDTYPE,
RELAPYN, ALIVE,
PTSTAT, SEVGRADE) %>%
#filter(ALIVE != "unknown")
tbl_summary(by=ALIVE) #%>%
| Characteristic | alive, N = 1671 | died, N = 271 | unknown, N = 31 |
|---|---|---|---|
| SITE | |||
| 01 | 59 (35%) | 9 (33%) | 3 (100%) |
| 02 | 95 (57%) | 16 (59%) | 0 (0%) |
| 03 | 0 (0%) | 0 (0%) | 0 (0%) |
| 04 | 13 (7.8%) | 2 (7.4%) | 0 (0%) |
| SEX | |||
| female | 88 (53%) | 12 (44%) | 1 (33%) |
| male | 79 (47%) | 15 (56%) | 2 (67%) |
| PTN | |||
| ACUTE LYMPHOCYTIC LEUKAEMIA | 35 (21%) | 5 (19%) | 3 (100%) |
| ACUTE MYELOID LEUKAEMIA | 76 (46%) | 14 (52%) | 0 (0%) |
| ACUTE MYELOMONOCYTIC LEUKAEMIA | 0 (0%) | 0 (0%) | 0 (0%) |
| ACUTE PROMYELOCYTIC LEUKAEMIA | 1 (0.6%) | 1 (3.7%) | 0 (0%) |
| ANAPLASTIC LARGE-CELL LYMPHOMA | 0 (0%) | 0 (0%) | 0 (0%) |
| ANGIOIMMUNOBLASTIC T-CELL LYMPHOMA | 1 (0.6%) | 0 (0%) | 0 (0%) |
| APLASTIC ANAEMIA | 12 (7.2%) | 2 (7.4%) | 0 (0%) |
| BURKITT'S LYMPHOMA | 0 (0%) | 0 (0%) | 0 (0%) |
| CHRONIC LYMPHOCYTIC LEUKAEMIA | 2 (1.2%) | 0 (0%) | 0 (0%) |
| CHRONIC MYELOID LEUKAEMIA | 9 (5.4%) | 3 (11%) | 0 (0%) |
| CHRONIC MYELOMONOCYTIC LEUKAEMIA | 1 (0.6%) | 0 (0%) | 0 (0%) |
| DIFFUSE LARGE B-CELL LYMPHOMA | 1 (0.6%) | 1 (3.7%) | 0 (0%) |
| HODGKIN'S DISEASE | 3 (1.8%) | 1 (3.7%) | 0 (0%) |
| MANTLE CELL LYMPHOMA | 2 (1.2%) | 0 (0%) | 0 (0%) |
| MYELODYSPLASTIC SYNDROME | 10 (6.0%) | 0 (0%) | 0 (0%) |
| PERIPHERAL T-CELL LYMPHOMA UNSPECIFIED | 2 (1.2%) | 0 (0%) | 0 (0%) |
| PLASMA CELL MYELOMA | 1 (0.6%) | 0 (0%) | 0 (0%) |
| PRIMARY MEDIASTINAL LARGE B-CELL LYMPHOMA | 0 (0%) | 0 (0%) | 0 (0%) |
| PRIMARY MYELOFIBROSIS | 8 (4.8%) | 0 (0%) | 0 (0%) |
| T-CELL LYMPHOMA | 1 (0.6%) | 0 (0%) | 0 (0%) |
| T-CELL TYPE ACUTE LEUKAEMIA | 2 (1.2%) | 0 (0%) | 0 (0%) |
| PSOCN | |||
| BLOOD AND LYMPHATIC SYSTEM DISORDERS | 12 (7.2%) | 2 (7.4%) | 0 (0%) |
| NEOPLASMS BENIGN, MALIGNANT AND UNSPECIFIED (INCL CYSTS AND POLYPS) | 155 (93%) | 25 (93%) | 3 (100%) |
| TRTYPE | |||
| Haplo | 65 (39%) | 17 (63%) | 2 (67%) |
| MMUD | 29 (17%) | 5 (19%) | 0 (0%) |
| MRD | 40 (24%) | 4 (15%) | 1 (33%) |
| MUD | 33 (20%) | 1 (3.7%) | 0 (0%) |
| TRSOURCE | |||
| BM | 24 (14%) | 5 (19%) | 0 (0%) |
| BSC | 143 (86%) | 22 (81%) | 3 (100%) |
| CONDTYPE | |||
| MAC | 22 (13%) | 5 (19%) | 1 (33%) |
| RIC | 145 (87%) | 22 (81%) | 2 (67%) |
| RELAPYN | |||
| no | 149 (89%) | 15 (56%) | 0 (0%) |
| unknown | 2 (1.2%) | 1 (3.7%) | 0 (0%) |
| yes | 16 (9.6%) | 11 (41%) | 3 (100%) |
| PTSTAT | |||
| 0 | 28 (17%) | 1 (3.7%) | 1 (33%) |
| 1 | 53 (32%) | 7 (26%) | 1 (33%) |
| 2 | 44 (26%) | 12 (44%) | 1 (33%) |
| 3 | 42 (25%) | 7 (26%) | 0 (0%) |
| SEVGRADE | |||
| mild | 43 (26%) | 5 (19%) | 1 (33%) |
| moderate | 59 (35%) | 9 (33%) | 0 (0%) |
| severe | 65 (39%) | 13 (48%) | 2 (67%) |
| 1 n (%) | |||
#add_p()
common_df_treatment_chronic %>%
select(SITE, SEX, PTN, PSOCN,
TRTYPE, TRSOURCE, CONDTYPE,
RELAPYN, ALIVE, PTSTAT, SEVGRADE,
LOT, TRRESP, ATCN, STERRES) %>%
#filter(STERRES != "no data") %>%
tbl_summary(by=STERRES) #%>%
| Characteristic | no data, N = 2051 | no resistance, N = 611 | resistance, N = 191 |
|---|---|---|---|
| SITE | |||
| 01 | 44 (21%) | 40 (66%) | 8 (42%) |
| 02 | 148 (72%) | 19 (31%) | 11 (58%) |
| 03 | 0 (0%) | 0 (0%) | 0 (0%) |
| 04 | 13 (6.3%) | 2 (3.3%) | 0 (0%) |
| SEX | |||
| female | 103 (50%) | 26 (43%) | 10 (53%) |
| male | 102 (50%) | 35 (57%) | 9 (47%) |
| PTN | |||
| ACUTE LYMPHOCYTIC LEUKAEMIA | 41 (20%) | 14 (23%) | 2 (11%) |
| ACUTE MYELOID LEUKAEMIA | 99 (48%) | 23 (38%) | 9 (47%) |
| ACUTE MYELOMONOCYTIC LEUKAEMIA | 0 (0%) | 0 (0%) | 0 (0%) |
| ACUTE PROMYELOCYTIC LEUKAEMIA | 2 (1.0%) | 0 (0%) | 0 (0%) |
| ANAPLASTIC LARGE-CELL LYMPHOMA | 0 (0%) | 0 (0%) | 0 (0%) |
| ANGIOIMMUNOBLASTIC T-CELL LYMPHOMA | 0 (0%) | 1 (1.6%) | 0 (0%) |
| APLASTIC ANAEMIA | 14 (6.8%) | 2 (3.3%) | 2 (11%) |
| BURKITT'S LYMPHOMA | 0 (0%) | 0 (0%) | 0 (0%) |
| CHRONIC LYMPHOCYTIC LEUKAEMIA | 2 (1.0%) | 0 (0%) | 0 (0%) |
| CHRONIC MYELOID LEUKAEMIA | 11 (5.4%) | 4 (6.6%) | 2 (11%) |
| CHRONIC MYELOMONOCYTIC LEUKAEMIA | 4 (2.0%) | 1 (1.6%) | 0 (0%) |
| DIFFUSE LARGE B-CELL LYMPHOMA | 5 (2.4%) | 0 (0%) | 3 (16%) |
| HODGKIN'S DISEASE | 6 (2.9%) | 1 (1.6%) | 1 (5.3%) |
| MANTLE CELL LYMPHOMA | 2 (1.0%) | 0 (0%) | 0 (0%) |
| MYELODYSPLASTIC SYNDROME | 6 (2.9%) | 6 (9.8%) | 0 (0%) |
| PERIPHERAL T-CELL LYMPHOMA UNSPECIFIED | 1 (0.5%) | 1 (1.6%) | 0 (0%) |
| PLASMA CELL MYELOMA | 1 (0.5%) | 1 (1.6%) | 0 (0%) |
| PRIMARY MEDIASTINAL LARGE B-CELL LYMPHOMA | 0 (0%) | 0 (0%) | 0 (0%) |
| PRIMARY MYELOFIBROSIS | 7 (3.4%) | 4 (6.6%) | 0 (0%) |
| T-CELL LYMPHOMA | 2 (1.0%) | 1 (1.6%) | 0 (0%) |
| T-CELL TYPE ACUTE LEUKAEMIA | 2 (1.0%) | 2 (3.3%) | 0 (0%) |
| PSOCN | |||
| BLOOD AND LYMPHATIC SYSTEM DISORDERS | 14 (6.8%) | 2 (3.3%) | 2 (11%) |
| NEOPLASMS BENIGN, MALIGNANT AND UNSPECIFIED (INCL CYSTS AND POLYPS) | 191 (93%) | 59 (97%) | 17 (89%) |
| TRTYPE | |||
| Haplo | 93 (45%) | 24 (39%) | 7 (37%) |
| MMUD | 42 (20%) | 9 (15%) | 3 (16%) |
| MRD | 34 (17%) | 19 (31%) | 3 (16%) |
| MUD | 36 (18%) | 9 (15%) | 6 (32%) |
| TRSOURCE | |||
| BM | 43 (21%) | 8 (13%) | 4 (21%) |
| BSC | 162 (79%) | 53 (87%) | 15 (79%) |
| CONDTYPE | |||
| MAC | 48 (23%) | 9 (15%) | 4 (21%) |
| RIC | 157 (77%) | 52 (85%) | 15 (79%) |
| RELAPYN | |||
| no | 170 (83%) | 54 (89%) | 16 (84%) |
| unknown | 3 (1.5%) | 0 (0%) | 0 (0%) |
| yes | 32 (16%) | 7 (11%) | 3 (16%) |
| ALIVE | |||
| alive | 173 (84%) | 51 (84%) | 15 (79%) |
| died | 30 (15%) | 9 (15%) | 4 (21%) |
| unknown | 2 (1.0%) | 1 (1.6%) | 0 (0%) |
| PTSTAT | |||
| 0 | 26 (13%) | 12 (20%) | 3 (16%) |
| 1 | 65 (32%) | 13 (21%) | 5 (26%) |
| 2 | 55 (27%) | 17 (28%) | 4 (21%) |
| 3 | 59 (29%) | 19 (31%) | 7 (37%) |
| SEVGRADE | |||
| mild | 49 (24%) | 0 (0%) | 0 (0%) |
| moderate | 65 (32%) | 20 (33%) | 6 (32%) |
| severe | 91 (44%) | 41 (67%) | 13 (68%) |
| LOT | |||
| 1 | 62 (47%) | 55 (90%) | 16 (84%) |
| 2 | 40 (31%) | 5 (8.2%) | 2 (11%) |
| 3 | 21 (16%) | 1 (1.6%) | 1 (5.3%) |
| 4 | 5 (3.8%) | 0 (0%) | 0 (0%) |
| 5 | 3 (2.3%) | 0 (0%) | 0 (0%) |
| A | 0 (0%) | 0 (0%) | 0 (0%) |
| Unknown | 74 | 0 | 0 |
| TRRESP | |||
| complete response | 29 (22%) | 24 (39%) | 0 (0%) |
| no response | 16 (12%) | 0 (0%) | 16 (84%) |
| not estimated | 13 (9.9%) | 0 (0%) | 0 (0%) |
| other | 1 (0.8%) | 0 (0%) | 0 (0%) |
| partial response | 70 (53%) | 37 (61%) | 0 (0%) |
| progression | 2 (1.5%) | 0 (0%) | 3 (16%) |
| Unknown | 74 | 0 | 0 |
| ATCN | |||
| BCR-ABL TYROSINE KINASE INHIBITORS | 7 (5.3%) | 0 (0%) | 0 (0%) |
| BRUTON'S TYROSINE KINASE (BTK) INHIBITORS | 1 (0.8%) | 0 (0%) | 0 (0%) |
| CALCINEURIN INHIBITORS | 41 (31%) | 0 (0%) | 0 (0%) |
| CORTICOSTEROIDS FOR SYSTEMIC USE | 0 (0%) | 0 (0%) | 0 (0%) |
| CORTICOSTEROIDS, DERMATOLOGICAL PREPARATIONS | 0 (0%) | 0 (0%) | 0 (0%) |
| CORTICOSTEROIDS, POTENT (GROUP III) | 0 (0%) | 1 (1.6%) | 0 (0%) |
| GLUCOCORTICOIDS | 0 (0%) | 60 (98%) | 19 (100%) |
| IMIDAZOLE DERIVATIVES | 0 (0%) | 0 (0%) | 0 (0%) |
| INTERLEUKIN INHIBITORS | 0 (0%) | 0 (0%) | 0 (0%) |
| INTERLEUKINS | 4 (3.1%) | 0 (0%) | 0 (0%) |
| JANUS-ASSOCIATED KINASE (JAK) INHIBITORS | 50 (38%) | 0 (0%) | 0 (0%) |
| OTHER IMMUNOSTIMULANTS | 0 (0%) | 0 (0%) | 0 (0%) |
| OTHER IMMUNOSUPPRESSANTS | 1 (0.8%) | 0 (0%) | 0 (0%) |
| SELECTIVE IMMUNOSUPPRESSANTS | 21 (16%) | 0 (0%) | 0 (0%) |
| TUMOR NECROSIS FACTOR ALPHA (TNF-) INHIBITORS | 6 (4.6%) | 0 (0%) | 0 (0%) |
| Unknown | 74 | 0 | 0 |
| 1 n (%) | |||
#add_p()
====================================================================== Рабочее, но неоформленное
Развлечения с сompareGroups
# library(compareGroups)
# compareGroups(TDINTER ~ .,
# data = common_df_treatment_chronic,
# method = c(triglyc = 2))
Описательные статистики рефрактерности: отберем пациентов, которые получали терапию кортикостероидами, расчитаем отдельно для группы с развившейся рефрактерностью и для группы без оной описательные статистики.
# common_df_treatment %>%
# filter(GVHDTRYN == "yes",
# ATCN %in% c("GLUCOCORTICOIDS",
# "CORTICOSTEROIDS FOR SYSTEMIC USE",
# "CORTICOSTEROIDS, POTENT (GROUP III)")) %>%
# select(SEX, TUTERM, COND, CONDTYPE, DRUGN, chronic_Joints,
# ALIVE, GVHDAGE, TRIND, RESPEV, STERRES) %>%
# tbl_summary(by = STERRES) #%>%
#add_p()
Нерабочая часть
# common_df_treatment <- common_df_treatment %>%
# mutate(
# GVHDTRYN = GVHDTRYN %>% as.factor(),
# LOT = LOT %>% as.factor(),
# ATCC = ATCC %>% as.factor(),
# ATCN = ATCN %>% as.factor(),
# TRRESP = TRRESP %>% as.factor(),
# RESPEV = RESPEV %>% as.factor(),
# TRONG = TRONG %>% as.factor(),
# TRSTDTC = format(as.Date(TRSTDTC, format = "%d/%m/%Y"), "%d.%m.%Y"),
# TRENDTC = format(as.Date(TRENDTC, format = "%d/%m/%Y"), "%d.%m.%Y")
# )
# summary(common_df_treatment)
Теперь единица наблюдения – это случай получения одного препарата у одного пациента. Более аггрегированная статистика может быть получена при группировке по нужной переменной (номер пациента, тип РТПХ или оба).
Добавим информацию о факте резистентности.
Не очень понятно, как оценивать, какую линию терапии не смогли оценить/какая не показала резистентности, поскольку для подобных ситуаций не указан номер линии терапии. Оставим только данные о факте резистентности.
# common_df_resist <- left_join(common_df_treatment,
# fnet$RS_20230119_120304 %>%
# filter(REFSTYN == "yes") %>%
# rename(LOT = REFSTLOT) %>%
# select(!SITE),
# by = c("SUBJID", "LOT"))
# После этого сделать проверку по дате: резистентность не может наступить раньше факта лечения.
# Вообще везде сделать проверку по дате